Question 1

kirb21 <- readFileList("/cloud/project/assignment3/kirby_3_1_ax_283Labels_M2_corrected_stats.txt")
kirb21_11 <- filter(kirb21, type == 1 & level == 1)
sum(kirb21_11$volume)
## [1] 1378295

Question 2

class <- read.table("/cloud/project/assignment3/classInterests.txt", header = T)
class$Year <- factor(class$Year, levels = c("Junior", "Sophomore", "Senior", "Master's", "PhD"))
ggplotly(ggplot(class, aes(x = Year, fill = Year)) +
  geom_bar() +
  theme_bw()+
  guides(fill = FALSE), 
  tooltip = c("y", "x")) 
ggplotly(ggplot(class, aes(x = Program, fill = Program)) +
  geom_bar() +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 30, vjust = 0.5))+
  guides(fill = FALSE), 
  tooltip = c("y", "x"))

Question 3

class_mo <- table(class)
ggplotly(ggplot(class) +
  geom_mosaic(aes(x = product(Program, Year), fill = Program)) +
  labs(title = "Mosaic plot for program and year", x = "Year", y = "Program") +
  theme(axis.text.x = element_text(angle = 30, vjust = 0.6, hjust = 0.8),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        plot.title = element_text(hjust = 0.5)))

Question 4

exprs <- read.csv("/cloud/project/assignment3/GSE5859_exprs.csv")
rownames(exprs) <- exprs$X
exprs <- exprs[,-1]
exprs <- apply(exprs, 2, function(x)x - mean(x)) %>%
  apply(., 1, function(x)x-mean(x))

Question 5

health <- read.csv("/cloud/project/assignment3/healthcare-spending.csv")
health_long <- pivot_longer(health, colnames(health)[-1], names_to = "Year", values_to = "Spend")
health_long$Year <- substr(health_long$Year, 2,5) %>%
  factor()
health_long$Spend <- as.numeric(health_long$Spend)
health_long <- subset(health_long, Location != "United States")
ggplotly(ggplot(health_long, aes(x = Year, y = Spend, group = Location, colour = Location)) +
  geom_line() +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90, vjust = 0.5)),
  tooltip = c("x", "y", "colour"))

Question 6

location <- health$Location
health <- health[,-1]
mean_spend <- apply(health, 1, mean)
health_mean <- data.frame(Location = location, mean = mean_spend) %>%
  subset(Location != "United States")
ggplotly(ggplot(health_mean, aes(x = Location, y = mean, fill = Location)) +
  geom_col()+
  labs(y = "Healthcare average spending") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
        legend.position = "none"),
  tooltip = c("y", "x"))